library(EnsDb.Hsapiens.v86)
## Loading required package: ensembldb
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: AnnotationFilter
##
## Attaching package: 'ensembldb'
## The following object is masked from 'package:stats':
##
## filter
library(Signac)
library(Seurat)
## Attaching SeuratObject
library(GenomeInfoDb)
library(ggplot2)
library(patchwork)
library(Matrix)
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:S4Vectors':
##
## expand
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:ensembldb':
##
## filter, select
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(hdf5r)
##
## Attaching package: 'hdf5r'
## The following object is masked from 'package:GenomicRanges':
##
## values
## The following object is masked from 'package:S4Vectors':
##
## values
library(future)
##
## Attaching package: 'future'
## The following object is masked from 'package:AnnotationFilter':
##
## value
library(ggplot2)
library(pheatmap)
options(future.globals.maxSize = 100000 * 1024^2)
# Create Seurat objects with RNA data only from the Multi-ome data (Sole-Boldo et al., 2022)
matrix_dir = "/scratch/groups/khavari/users/andrewji/sb_multiome/Sole-Boldo_2022_Multiome/"
s_dir = c("S1/GSM6284671_S1_filtered_feature_bc_matrix.h5","S2/GSM6284672_S2_filtered_feature_bc_matrix.h5")
all_rna_data = list()
for (i in 1:length(s_dir))
{
mat = Read10X_h5(paste(matrix_dir,s_dir[i],sep=""))
mat_rna = mat[[1]]
all_rna_data[[i]] = mat_rna
}
## Genome matrix has multiple modalities, returning a list of matrices for this genome
## Genome matrix has multiple modalities, returning a list of matrices for this genome
all_obj = list()
allnames = c("S1","S2")
for (i in 1:length(all_rna_data))
{
obj <- CreateSeuratObject(counts = all_rna_data[[i]], project = allnames[i])
obj[["percent.mt"]] <- PercentageFeatureSet(obj, pattern = "^MT-")
obj = subset(obj, nFeature_RNA >= 200 & percent.mt < 10)
all_obj[[i]] = obj
}
# Normalize and identify variable features for each dataset independently
all_obj <- lapply(X = all_obj, FUN = function(x) {
x <- NormalizeData(x)
x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})
# Select features that are repeatedly variable across datasets for integration
features <- SelectIntegrationFeatures(object.list = all_obj)
s_anchors <- FindIntegrationAnchors(object.list = all_obj, anchor.features = features)
## Warning in CheckDuplicateCellNames(object.list = object.list): Some cell names
## are duplicated across objects provided. Renaming to enforce unique cell names.
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 8691 anchors
## Filtering anchors
## Retained 4083 anchors
# Integrate both samples
s_comb <- IntegrateData(anchorset = s_anchors)
## Merging dataset 2 into 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
# Perform dimensionality reduction on integrated object
DefaultAssay(s_comb) <- "integrated"
# Run the standard workflow for visualization and clustering
s_comb <- ScaleData(s_comb, verbose = FALSE)
s_comb <- RunPCA(s_comb, npcs = 30, verbose = FALSE)
s_comb <- RunUMAP(s_comb, reduction = "pca", dims = 1:30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 08:53:26 UMAP embedding parameters a = 0.9922 b = 1.112
## 08:53:26 Read 6661 rows and found 30 numeric columns
## 08:53:26 Using Annoy for neighbor search, n_neighbors = 30
## 08:53:26 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 08:53:27 Writing NN index file to temp file /tmp/RtmpwYMQx5/file37542f22948e
## 08:53:27 Searching Annoy index using 1 thread, search_k = 3000
## 08:53:29 Annoy recall = 100%
## 08:53:31 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 08:53:34 Initializing from normalized Laplacian + noise (using irlba)
## 08:53:34 Commencing optimization for 500 epochs, with 282304 positive edges
## 08:53:52 Optimization finished
s_comb <- FindNeighbors(s_comb, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
s_comb <- FindClusters(s_comb, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6661
## Number of edges: 258901
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9453
## Number of communities: 7
## Elapsed time: 0 seconds
s_comb <- FindClusters(s_comb, resolution = 0.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6661
## Number of edges: 258901
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9170
## Number of communities: 9
## Elapsed time: 0 seconds
s_comb <- FindClusters(s_comb, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6661
## Number of edges: 258901
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8794
## Number of communities: 11
## Elapsed time: 0 seconds
s_comb <- FindClusters(s_comb, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6661
## Number of edges: 258901
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8620
## Number of communities: 12
## Elapsed time: 0 seconds
s_comb <- FindClusters(s_comb, resolution = 0.6)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6661
## Number of edges: 258901
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8484
## Number of communities: 12
## Elapsed time: 0 seconds
s_comb <- FindClusters(s_comb, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6661
## Number of edges: 258901
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8226
## Number of communities: 15
## Elapsed time: 0 seconds
# UMAP labeled by sample
DimPlot(s_comb, reduction = "umap", group.by = "orig.ident")

# UMAP labeled by clusters at res 0.8
DimPlot(s_comb, reduction = "umap", label = TRUE, repel = TRUE)

# Check expression of markers
marker_genes = c("KRT5","KRT14","COL17A1","KRT1","KRT10","MKI67")
marker_genes2 = c("IVL","KLK7","SOX9","HLA-DRA","MLANA","PTPRC")
FeaturePlot(object = s_comb, features = marker_genes, cols = c("lightgrey", "blue"), pt.size = 0.25, order=T, ncol = 3)
## Warning: Could not find KRT1 in the default search locations, found in RNA assay
## instead
## Warning: Could not find KRT10 in the default search locations, found in RNA
## assay instead

FeaturePlot(object = s_comb, features = marker_genes2, cols = c("lightgrey", "blue"), pt.size = 0.25, order=T, ncol = 3)

# Subset IFE cells and re-integrate
Idents(s_comb) = "integrated_snn_res.0.8"
s_comb_filt = subset(s_comb, idents = c(0:6,8:10,14))
# Add ATAC data to object
# Read in counts for ATAC
all_atac_data = list()
for (i in 1:length(s_dir))
{
mat = Read10X_h5(paste(matrix_dir,s_dir[i],sep=""))
mat_atac = mat[[2]]
all_atac_data[[i]] = mat_atac
}
## Genome matrix has multiple modalities, returning a list of matrices for this genome
## Genome matrix has multiple modalities, returning a list of matrices for this genome
s1_atac_counts = all_atac_data[[1]]
s1_grange.counts <- StringToGRanges(rownames(s1_atac_counts), sep = c(":", "-"))
s1_grange.use <- seqnames(s1_grange.counts) %in% standardChromosomes(s1_grange.counts)
s1_atac_counts <- s1_atac_counts[as.vector(s1_grange.use), ]
s1_annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
seqlevelsStyle(s1_annotations) <- 'UCSC'
genome(s1_annotations) <- "hg38"
frag.files = c("S1/GSM6284669_S1_atac_fragments.tsv.gz","S2/GSM6284670_S2_atac_fragments.tsv.gz")
s1_frag.file <- paste(matrix_dir,frag.files[1],sep="")
s1_chrom_assay <- CreateChromatinAssay(
counts = s1_atac_counts,
sep = c(":", "-"),
genome = 'hg38',
fragments = s1_frag.file,
min.cells = 10,
annotation = s1_annotations
)
## Computing hash
s1_atac <- CreateSeuratObject(
counts = s1_chrom_assay,
assay = "peaks",
#meta.data = metadata
)
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from peaks to peaks_
s2_atac_counts = all_atac_data[[2]]
s2_grange.counts <- StringToGRanges(rownames(s2_atac_counts), sep = c(":", "-"))
s2_grange.use <- seqnames(s2_grange.counts) %in% standardChromosomes(s2_grange.counts)
s2_atac_counts <- s2_atac_counts[as.vector(s2_grange.use), ]
s2_annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
seqlevelsStyle(s2_annotations) <- 'UCSC'
genome(s2_annotations) <- "hg38"
s2_frag.file <- paste(matrix_dir,frag.files[2],sep="")
s2_chrom_assay <- CreateChromatinAssay(
counts = s2_atac_counts,
sep = c(":", "-"),
genome = 'hg38',
fragments = s2_frag.file,
min.cells = 10,
annotation = s2_annotations
)
## Computing hash
s2_atac <- CreateSeuratObject(
counts = s2_chrom_assay,
assay = "peaks",
#meta.data = metadata
)
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from peaks to peaks_
DefaultAssay(s1_atac) = "peaks"
DefaultAssay(s2_atac) = "peaks"
# convert to genomic ranges
gr_s1 = granges(s1_atac)
gr_s2 = granges(s2_atac)
# Create a unified set of peaks to quantify in each dataset
combined.peaks <- reduce(x = c(gr_s1, gr_s2))
# Filter out bad peaks based on length
peakwidths <- width(combined.peaks)
combined.peaks <- combined.peaks[peakwidths < 10000 & peakwidths > 20]
combined.peaks
## GRanges object with 80844 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 180790-181751 *
## [2] chr1 183867-184813 *
## [3] chr1 186562-187409 *
## [4] chr1 190968-191900 *
## [5] chr1 629432-630398 *
## ... ... ... ...
## [80840] chrY 56873534-56874398 *
## [80841] chrY 56877137-56878338 *
## [80842] chrY 56879505-56880375 *
## [80843] chrY 56883603-56884517 *
## [80844] chrY 56885148-56886181 *
## -------
## seqinfo: 24 sequences from an unspecified genome; no seqlengths
# Create new merged ATAC object
combined <- merge(
x = s1_atac,
y = s2_atac,
add.cell.ids = c("S1","S2")
)
##
## Binding matrix rows
##
## Binding matrix rows
##
## Binding matrix rows
##
## Binding matrix rows
combined[["peaks"]]
## ChromatinAssay data with 80844 features for 6793 cells
## Variable features: 0
## Genome: hg38
## Annotation present: TRUE
## Motifs present: FALSE
## Fragment files: 2
# Match barcode names and filter combined for overlapping cells in s_comb_filt
s_comb_mo = s_comb_filt
col_split = strsplit(colnames(s_comb_mo),"_")
new_col = paste("S",unlist(lapply(col_split,"[[",2)),"_",unlist(lapply(col_split,"[[",1)),sep="")
s_comb_mo = RenameCells(s_comb_mo, new.names = new_col)
combined_filt = subset(combined, cells = colnames(s_comb_mo))
s_comb_mo[["ATAC"]] = combined_filt[["peaks"]]
# Dimensionality reduction with ATAC
DefaultAssay(s_comb_mo) <- "ATAC"
s_comb_mo <- RunTFIDF(s_comb_mo)
## Performing TF-IDF normalization
s_comb_mo <- FindTopFeatures(s_comb_mo, min.cutoff = 'q0')
s_comb_mo <- RunSVD(s_comb_mo)
## Running SVD
## Scaling cell embeddings
s_comb_mo <- RunUMAP(s_comb_mo, reduction = 'lsi', dims = 2:50, reduction.name = "umap.atac", reduction.key = "atacUMAP_")
## 09:00:54 UMAP embedding parameters a = 0.9922 b = 1.112
## 09:00:54 Read 6108 rows and found 49 numeric columns
## 09:00:54 Using Annoy for neighbor search, n_neighbors = 30
## 09:00:54 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 09:00:55 Writing NN index file to temp file /tmp/RtmpwYMQx5/file37544f079aac
## 09:00:55 Searching Annoy index using 1 thread, search_k = 3000
## 09:00:57 Annoy recall = 100%
## 09:00:59 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 09:01:03 Initializing from normalized Laplacian + noise (using irlba)
## 09:01:03 Commencing optimization for 500 epochs, with 244916 positive edges
## 09:01:20 Optimization finished
# Integrate ATAC objects
obj_list_atac = SplitObject(s_comb_mo,split.by = "orig.ident")
# find integration anchors
integration.anchors <- FindIntegrationAnchors(
object.list = obj_list_atac,
anchor.features = rownames(s_comb_mo),
reduction = "rlsi",
dims = 2:30
)
## Computing within dataset neighborhoods
## Finding all pairwise anchors
## Warning: No filtering performed if passing to data rather than counts
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
## Found 1942 anchors
# Create new object with integrated LSI embeddings
s_comb_atac = s_comb_mo
s_comb_atac <- IntegrateEmbeddings(
anchorset = integration.anchors,
reductions = s_comb_atac[["lsi"]],
new.reduction.name = "integrated_lsi",
dims.to.integrate = 1:30
)
## Merging dataset 1 into 2
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
# Integrate RNA objects
DefaultAssay(s_comb_mo) = "RNA"
obj_list = SplitObject(s_comb_mo,split.by = "orig.ident")
# normalize and identify variable features for each dataset independently
obj_list <- lapply(X = obj_list, FUN = function(x) {
DefaultAssay(x) = "RNA"
x <- NormalizeData(x)
x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})
features <- SelectIntegrationFeatures(object.list = obj_list)
s_anchors <- FindIntegrationAnchors(object.list = obj_list, anchor.features = features)
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 8587 anchors
## Filtering anchors
## Retained 3515 anchors
s_comb_mo <- IntegrateData(anchorset = s_anchors, dims = 1:30)
## Merging dataset 1 into 2
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
DefaultAssay(s_comb_mo) = "integrated"
s_comb_mo <- ScaleData(s_comb_mo, verbose = FALSE)
s_comb_mo <- RunPCA(s_comb_mo, npcs = 30, verbose = FALSE)
s_comb_mo <- RunUMAP(s_comb_mo, reduction = "pca", dims = 1:30, reduction.name = 'umap.rna', reduction.key = 'rnaUMAP_')
## 09:04:39 UMAP embedding parameters a = 0.9922 b = 1.112
## 09:04:39 Read 6108 rows and found 30 numeric columns
## 09:04:39 Using Annoy for neighbor search, n_neighbors = 30
## 09:04:39 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 09:04:40 Writing NN index file to temp file /tmp/RtmpwYMQx5/file3754263916ba
## 09:04:40 Searching Annoy index using 1 thread, search_k = 3000
## 09:04:42 Annoy recall = 100%
## 09:04:44 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 09:04:47 Initializing from normalized Laplacian + noise (using irlba)
## 09:04:47 Commencing optimization for 500 epochs, with 260074 positive edges
## 09:05:03 Optimization finished
# RNA UMAP by sample identity
p1 = DimPlot(s_comb_mo, reduction = "umap.rna", label = F, group.by = "orig.ident") + NoLegend() + NoAxes()
print(p1)

# save png
png("s_comb_mo_predicted_id_umap_rna_orig.ident.png",width = 5.5, height = 5, res = 300, units = "in")
print(p1)
dev.off()
## png
## 2
# Add ATAC integration to original object
s_comb_mo[["integrated_lsi"]] = s_comb_atac[["integrated_lsi"]]
# Run UMAP on integrated ATAC objects
s_comb_mo <- RunUMAP(s_comb_mo, reduction = 'integrated_lsi', dims = 2:50, reduction.name = "umap.atac", reduction.key = "atacUMAP_")
## 09:05:07 UMAP embedding parameters a = 0.9922 b = 1.112
## 09:05:07 Read 6108 rows and found 49 numeric columns
## 09:05:07 Using Annoy for neighbor search, n_neighbors = 30
## 09:05:07 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 09:05:08 Writing NN index file to temp file /tmp/RtmpwYMQx5/file37541898f89a
## 09:05:08 Searching Annoy index using 1 thread, search_k = 3000
## 09:05:10 Annoy recall = 100%
## 09:05:12 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 09:05:16 Initializing from normalized Laplacian + noise (using irlba)
## 09:05:16 Commencing optimization for 500 epochs, with 259218 positive edges
## 09:05:34 Optimization finished
setwd("/scratch/groups/khavari/users/andrewji/sb_multiome/s_comb_mo/render")
# ATAC UMAP labeled by sample identity
p1 = DimPlot(s_comb_mo, reduction = "umap.atac", label = F, group.by = "orig.ident") + NoLegend() + NoAxes()
print(p1)

# save png
png("s_comb_mo_predicted_id_umap_atac_origident.png",width = 5.5, height = 5, res = 300, units = "in")
print(p1)
dev.off()
## png
## 2
# Predict AJ cohort IFE clusters
DefaultAssay(s_comb_mo) = "integrated"
# Load AJ cohort IFE object
kc_int_filt = readRDS("/oak/stanford/groups/khavari/users/andrewji/normal_skin/kc_norm/kc_int/kc_int_filt/kc_int_filt.Rds")
Idents(kc_int_filt) = "integrated_snn_res.0.4"
kc.anchors <- FindTransferAnchors(reference = kc_int_filt, query = s_comb_mo,
dims = 1:30, k.filter=NA)
## Performing PCA on the provided reference using 622 features as input.
## Projecting cell embeddings
## Finding neighborhoods
## Finding anchors
## Found 1956 anchors
kc_predictions <- TransferData(anchorset = kc.anchors, refdata = kc_int_filt$integrated_snn_res.0.4,
dims = 1:30)
## Finding integration vectors
## Finding integration vector weights
## Predicting cell labels
s_comb_mo <- AddMetaData(s_comb_mo, metadata = kc_predictions)
Idents(s_comb_mo) = "predicted.id"
p = DimPlot(object = s_comb_mo, group.by = "predicted.id", label = TRUE)
print(p)

# Plot by predicted clusters with matched colors as AJ cohort IFE (0 cells with predicted cluster 11)
Idents(s_comb_mo) = "predicted.id"
my_levels = c(0:10)
match_cols = scales::hue_pal()(12)[1:11]
Idents(s_comb_mo) = factor(x = Idents(s_comb_mo), levels = my_levels)
# RNA UMAP and save png
p1 <- DimPlot(s_comb_mo, reduction = "umap.rna", label = TRUE, cols = match_cols, label.size = 8) + NoLegend() + NoAxes()
print(p1)

png("s_comb_mo_predicted_id_umap_rna_match_color.png",width = 5.5, height = 5, res = 300, units = "in")
print(p1)
dev.off()
## png
## 2
# ATAC UMAP and save png
p1 <- DimPlot(s_comb_mo, reduction = "umap.atac", label = TRUE, cols = match_cols, label.size = 8) + NoLegend() + NoAxes()
print(p1)

png("s_comb_mo_predicted_id_umap_atac_match_color.png",width = 5.5, height = 5, res = 300, units = "in")
print(p1)
dev.off()
## png
## 2
# Highlight cluster 2 predicted cells (Spinous II cells)
# RNA UMAP
p = DimPlot(object = s_comb_mo, reduction = "umap.rna", group.by = "predicted.id", label = F, cells.highlight = WhichCells(s_comb_mo,idents=2)) + NoAxes() + NoLegend()
print(p)

# save png
png("s_comb_mo_umap_rna_highlight_predicted_2.png",width = 5, height = 5, res = 300, units = "in")
print(p)
dev.off()
## png
## 2
# ATAC UMAP
p = DimPlot(object = s_comb_mo, reduction = "umap.atac", group.by = "predicted.id", label = F, cells.highlight = WhichCells(s_comb_mo,idents=2)) + NoAxes() + NoLegend()
print(p)

# save png
png("s_comb_mo_umap_atac_highlight_predicted_2.png",width = 5, height = 5, res = 300, units = "in")
print(p)
dev.off()
## png
## 2
# Feature plots for predicted cluster scores
predicted_id = paste("prediction.score.",0:11,sep="")
p = FeaturePlot(object = s_comb_mo, features = predicted_id[1:6], cols = c("lightgrey", "blue"), pt.size = 0.25, order=T, ncol = 3)
print(p)

p = FeaturePlot(object = s_comb_mo, features = predicted_id[7:12], cols = c("lightgrey", "blue"), pt.size = 0.25, order=T, ncol = 3)
## Warning in FeaturePlot(object = s_comb_mo, features = predicted_id[7:12], : All
## cells have the same value (0) of prediction.score.11.
print(p)

# save png
png("s_comb_mo_featureplot_predicted_id_scores.png",width = 10, height = 14, res = 300, units = "in")
FeaturePlot(object = s_comb_mo, features = predicted_id[1:6], cols = c("lightgrey", "blue"), pt.size = 0.25, order=T, ncol = 3)
dev.off()
## png
## 2
# Order clusters from basal to granular
my_levels = c(3,4,6,5,8,7,2,0,1,9,10)
Idents(s_comb_mo) = factor(x = Idents(s_comb_mo), levels = my_levels)
# Coverage plots for major predicted basal, spinous, and granular clusters
p = CoveragePlot(s_comb_mo, region = "KRT5", extend.upstream = 2000, extend.downstream = 2000, features = "KRT5", assay = 'ATAC', expression.assay = 'RNA', peaks = T, idents = c(3,4,2,1,10))
p = p & scale_fill_manual(values = match_cols[c(4,5,3,2,11)])
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
print(p)

p = CoveragePlot(s_comb_mo, region = "KRT14", extend.upstream = 2000, extend.downstream = 2000, features = "KRT14", assay = 'ATAC', expression.assay = 'RNA', peaks = T, idents = c(3,4,2,1,10))
p = p & scale_fill_manual(values = match_cols[c(4,5,3,2,11)])
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
print(p)

p = CoveragePlot(s_comb_mo, region = "KRT1", extend.upstream = 2000, extend.downstream = 2000, features = "KRT1", assay = 'ATAC', expression.assay = 'RNA', peaks = T, idents = c(3,4,2,1,10))
p = p & scale_fill_manual(values = match_cols[c(4,5,3,2,11)])
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
print(p)

p = CoveragePlot(s_comb_mo, region = "KRT10", extend.upstream = 2000, extend.downstream = 2000, features = "KRT10", assay = 'ATAC', expression.assay = 'RNA', peaks = T, idents = c(3,4,2,1,10))
p = p & scale_fill_manual(values = match_cols[c(4,5,3,2,11)])
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
print(p)
## Warning: Removed 7 rows containing missing values (`geom_segment()`).

# Get differentially accessible peaks (code run separately to save time)
# Idents(s_comb_mo) = "predicted.id"
# da_peaks_all <- FindAllMarkers(
# object = s_comb_mo,
# test.use = 'LR',
# latent.vars = 'atac_peak_region_fragments',
# min.pct = 0.05
# )
# write.table(da_peaks_all,file="s_comb_mo_kc_int_filt_res0.4_predict_da_peaks.csv",sep=",",row.names=T,col.names=T)
# Print heatmap of differentially accessible peaks
s_comb_mo_da_peaks = read.table("/scratch/groups/khavari/users/andrewji/sb_multiome/s_comb_mo/s_comb_mo_kc_int_filt_res0.4_predict_da_peaks.csv",sep=",",row.names=1,header=T,stringsAsFactors=F)
s_comb_mo_da_peaks_sig = subset(s_comb_mo_da_peaks, p_val < 0.005)
s_comb_mo_exp = as.matrix(s_comb_mo[["ATAC"]]@data)
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 3.7 GiB
# Calculate average accessibility per predicted cluster (exclude clus 9 given low number of cells)
comb_avg_exp = c()
clus = c(0:8,10)
for (i in 1:length(clus)) {
cells = WhichCells(s_comb_mo, idents = clus[i])
exp_mat = s_comb_mo_exp[,cells]
if (length(cells)>1) {avg = apply(exp_mat,1,mean)}
else {avg = exp_mat}
comb_avg_exp = cbind(comb_avg_exp,avg)
}
colnames(comb_avg_exp) = clus
col.pal <- rev(RColorBrewer::brewer.pal(11, "RdBu"))
da_peaks_sig = unique(s_comb_mo_da_peaks_sig$gene)
atac_heat_map = pheatmap(comb_avg_exp[da_peaks_sig,as.character(c(3,4,2,1,10))], show_rownames = F, cluster_cols = T, cluster_rows = T, main = "Peaks", scale = "row",color = col.pal)

setwd("/scratch/groups/khavari/users/andrewji/sb_multiome/s_comb_mo/render")
ggsave(atac_heat_map,file="s_comb_mo_sub_da_peaks_p_0.005_clustered.pdf",height = 10, width=10)
# Make bar plots of significantly up/down accessible peaks in clusters 3,4,2,1,0 (main predicted basal, spionous, and granular clusters)
sub_sig = subset(s_comb_mo_da_peaks_sig, cluster %in% c(1,2,3,4,10))
sub_sig_up = subset(sub_sig, avg_log2FC > 0)
sub_sig_down = subset(sub_sig,avg_log2FC<0)
peak_table_up = table(sub_sig_up$cluster)
peak_table_down = table(sub_sig_down$cluster)
peak_df = rbind(peak_table_up,peak_table_down)
rownames(peak_df) = c("Up","Down")
peak_df_melt = reshape2::melt(peak_df)
peak_df_melt$Var2 <- as.factor(peak_df_melt$Var2)
peak_df_melt$Var2 = factor(peak_df_melt$Var2, levels = c(3,4,2,1,10))
peak_df_melt$new_value <- with(peak_df_melt, ifelse(Var1 == "Up", value, -value))
#new_df_melt <- with(peak_df_melt, ifelse(Var1 == "Down", value, -value))
g = ggplot(peak_df_melt, aes(x=Var2, y=new_value, fill=Var1)) +
geom_bar(stat="identity", position="identity") + ylab("Number of Differentially Accessible Peaks") + xlab("Predicted Cluster")
print(g)

ggsave(filename = "s_comb_mo_num_da_peaks_sub_clus.pdf",g,width = 5,height = 6, useDingbats=F)
setwd("/scratch/groups/khavari/users/andrewji/sb_multiome/s_comb_mo/render")
saveRDS(s_comb_mo,file="s_comb_mo.Rds")
sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /share/software/user/open/openblas/0.3.10/lib/libopenblas_haswellp-r0.3.10.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] pheatmap_1.0.12 future_1.30.0
## [3] hdf5r_1.3.7 dplyr_1.0.10
## [5] Matrix_1.5-3 patchwork_1.1.2
## [7] ggplot2_3.4.0 SeuratObject_4.1.3
## [9] Seurat_4.3.0 Signac_1.9.0
## [11] EnsDb.Hsapiens.v86_2.99.0 ensembldb_2.22.0
## [13] AnnotationFilter_1.22.0 GenomicFeatures_1.50.3
## [15] AnnotationDbi_1.60.0 Biobase_2.58.0
## [17] GenomicRanges_1.50.2 GenomeInfoDb_1.34.4
## [19] IRanges_2.32.0 S4Vectors_0.36.1
## [21] BiocGenerics_0.44.0
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 spatstat.explore_3.0-5
## [3] reticulate_1.26 tidyselect_1.2.0
## [5] RSQLite_2.2.20 htmlwidgets_1.6.0
## [7] grid_4.2.0 BiocParallel_1.32.5
## [9] Rtsne_0.16 munsell_0.5.0
## [11] codetools_0.2-18 ica_1.0-3
## [13] interp_1.1-3 miniUI_0.1.1.1
## [15] withr_2.5.0 spatstat.random_3.0-1
## [17] colorspace_2.0-3 progressr_0.12.0
## [19] filelock_1.0.2 highr_0.9
## [21] knitr_1.39 rstudioapi_0.14
## [23] ROCR_1.0-11 tensor_1.5
## [25] listenv_0.9.0 MatrixGenerics_1.10.0
## [27] labeling_0.4.2 GenomeInfoDbData_1.2.9
## [29] polyclip_1.10-4 farver_2.1.1
## [31] bit64_4.0.5 parallelly_1.33.0
## [33] vctrs_0.5.1 generics_0.1.3
## [35] xfun_0.32 biovizBase_1.46.0
## [37] BiocFileCache_2.6.0 R6_2.5.1
## [39] bitops_1.0-7 spatstat.utils_3.0-1
## [41] cachem_1.0.6 DelayedArray_0.24.0
## [43] assertthat_0.2.1 promises_1.2.0.1
## [45] BiocIO_1.8.0 scales_1.2.1
## [47] nnet_7.3-17 gtable_0.3.1
## [49] globals_0.16.2 goftest_1.2-3
## [51] rlang_1.0.6 RcppRoll_0.3.0
## [53] splines_4.2.0 rtracklayer_1.58.0
## [55] lazyeval_0.2.2 dichromat_2.0-0.1
## [57] checkmate_2.1.0 spatstat.geom_3.0-3
## [59] yaml_2.3.5 reshape2_1.4.4
## [61] abind_1.4-5 backports_1.4.1
## [63] httpuv_1.6.7 Hmisc_4.7-2
## [65] tools_4.2.0 ellipsis_0.3.2
## [67] jquerylib_0.1.4 RColorBrewer_1.1-3
## [69] ggridges_0.5.4 Rcpp_1.0.9
## [71] plyr_1.8.8 base64enc_0.1-3
## [73] progress_1.2.2 zlibbioc_1.44.0
## [75] purrr_0.3.4 RCurl_1.98-1.9
## [77] prettyunits_1.1.1 rpart_4.1.16
## [79] deldir_1.0-6 pbapply_1.6-0
## [81] cowplot_1.1.1 zoo_1.8-11
## [83] SummarizedExperiment_1.28.0 ggrepel_0.9.2
## [85] cluster_2.1.3 magrittr_2.0.3
## [87] data.table_1.14.6 scattermore_0.8
## [89] lmtest_0.9-40 RANN_2.6.1
## [91] ProtGenerics_1.30.0 fitdistrplus_1.1-8
## [93] matrixStats_0.63.0 hms_1.1.2
## [95] mime_0.12 evaluate_0.16
## [97] xtable_1.8-4 XML_3.99-0.13
## [99] jpeg_0.1-10 gridExtra_2.3
## [101] compiler_4.2.0 biomaRt_2.54.0
## [103] tibble_3.1.8 KernSmooth_2.23-20
## [105] crayon_1.5.1 htmltools_0.5.4
## [107] later_1.3.0 Formula_1.2-4
## [109] tidyr_1.2.1 DBI_1.1.3
## [111] dbplyr_2.2.1 MASS_7.3-56
## [113] rappdirs_0.3.3 cli_3.5.0
## [115] parallel_4.2.0 igraph_1.3.5
## [117] pkgconfig_2.0.3 GenomicAlignments_1.34.0
## [119] foreign_0.8-82 sp_1.5-1
## [121] plotly_4.10.1 spatstat.sparse_3.0-0
## [123] xml2_1.3.3 bslib_0.4.0
## [125] XVector_0.38.0 VariantAnnotation_1.44.0
## [127] stringr_1.4.0 digest_0.6.29
## [129] sctransform_0.3.5 RcppAnnoy_0.0.20
## [131] spatstat.data_3.0-0 Biostrings_2.66.0
## [133] rmarkdown_2.15 leiden_0.4.3
## [135] fastmatch_1.1-3 htmlTable_2.4.1
## [137] uwot_0.1.14 restfulr_0.0.15
## [139] curl_4.3.3 shiny_1.7.4
## [141] Rsamtools_2.14.0 rjson_0.2.21
## [143] lifecycle_1.0.3 nlme_3.1-157
## [145] jsonlite_1.8.4 BSgenome_1.66.1
## [147] viridisLite_0.4.1 fansi_1.0.3
## [149] pillar_1.8.1 lattice_0.20-45
## [151] KEGGREST_1.38.0 fastmap_1.1.0
## [153] httr_1.4.4 survival_3.3-1
## [155] glue_1.6.2 png_0.1-8
## [157] bit_4.0.5 stringi_1.7.8
## [159] sass_0.4.2 blob_1.2.3
## [161] latticeExtra_0.6-30 memoise_2.0.1
## [163] irlba_2.3.5.1 future.apply_1.10.0